batchsize = 1
gbs = 512
seqlength = 8192
padlength = 8192
precision = 'bf16'
tensor_parallel = 1
pipeline_parallel = 2
accumulate_steps = 1
theoryflops = -1
epochs = 1